rm(list=ls())

library(xts)
library(glmnet)
library(xlsx)
library(dplyr)
library(tidyr)
library(glmnetUtils)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
get_alpha <- function(fit) {
  alpha <- fit$alpha
  error <- sapply(fit$modlist, function(mod) {min(mod$cvm)})
  alpha[which.min(error)]
}

# Get all parameters.
get_model_params <- function(fit) {
  alpha <- fit$alpha
  lambdaMin <- sapply(fit$modlist, `[[`, "lambda.min")
  lambdaSE <- sapply(fit$modlist, `[[`, "lambda.1se")
  error <- sapply(fit$modlist, function(mod) {min(mod$cvm)})
  best <- which.min(error)
  data.frame(alpha = alpha[best], lambdaMin = lambdaMin[best],
             lambdaSE = lambdaSE[best], eror = error[best])
}


#load("0912_Download_ALLData.rda")
load("incident_hosp_cases.rda")
state_cases_all<-state_cases
# plot(na.omit(state_hosp[,'CA']),main = "New Confirmed COVID-19 Hospital Admission in Calnifornia")
# plot(gtdataALL_IQR_noMedia[['US-CA']][,'how.long.contagious'],main = "Google Search Frequency of Term how.long.contagious in Calnifornia")
#vaccination
vaccination <- read.csv('COVID-19_Vaccinations_in_the_United_States_County.csv', check.names=FALSE,as.is=TRUE)
vaccin_selected<- cbind(vaccination[1],vaccination[5], vaccination[17])
colnames(vaccin_selected)[2]<- 'State'
vaccin_selected<-aggregate(vaccin_selected[3], by=list(Category=vaccin_selected$State, vaccin_selected$Date), FUN=max)
colnames(vaccin_selected)[2] <- 'Date'
vaccin_selected$Date<- anytime::anydate(vaccin_selected$Date)
colnames(vaccin_selected)[1]<- 'State'
vaccin_all<-vaccin_selected%>%group_by(State)%>%spread(State,Series_Complete_Pop_Pct)
vaccin_all<-xts(vaccin_all[,-1],order.by = vaccin_all$Date)
storage.mode(vaccin_all)<-'numeric'
vaccin_all<-100-vaccin_all
vaccin_append<-xts(matrix(100,nrow = length(index(state_hosp)),ncol = length(colnames(vaccin_all))),order.by = index(state_hosp))
colnames(vaccin_append)<-colnames(vaccin_all)
vaccin_append<-vaccin_append[index(vaccin_append)<index(vaccin_all)[1]]
vaccin_all<-rbind(vaccin_all,vaccin_append)


#neighboring states
neigh_states<-read.csv('neighbors-states.csv')
find_neighboring_states<-function(my_state){
  
  curr_neigh_states<-as.matrix(neigh_states[neigh_states[,1]==my_state,2])
  curr_neigh_states<-rbind(curr_neigh_states,as.matrix(neigh_states[neigh_states[,2]==my_state,1]))
  return(curr_neigh_states)
}

# Daily Indicator (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday)
Days_of_Week = matrix(0,dim(state_hosp)[1],7)
Days_of_Week = xts(Days_of_Week, index(state_hosp))
colnames(Days_of_Week) = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
for (i in 1:7){
  Days_of_Week[which(weekdays(index(Days_of_Week)) == colnames(Days_of_Week)[i]),i] = 1
}

load("gt_data_new.rda")


#terms_important<-names(MedRaw_optLag)[1:4]
terms_important<-c("how.long.contagious",
                   "loss.of.smell","loss.of.taste","X.covid.19.vaccine.","X.Cough." ,"pneumonia",
                   "how.long.covid.19","sinus",
                   "symptoms.of.the.covid.19", "contagious.coronavirus","X.coronavirus.vaccine.")
if(FALSE){
  plot_dates<-seq(as.Date("2020-07-15"),as.Date("2021-10-15"),by=7)
  visual_data<-na.omit(cbind(state_hosp[plot_dates,'CA'],gtdataALL_IQR_noMedia[['US-CA']][plot_dates,"how.long.covid.19"]))
  visual_data[,2]<-visual_data[,2]*(mean(visual_data[,1])/mean(visual_data[,2]))
  
  plot(visual_data)
}
gtdata_stateMixed_IQR_noMedia<-gtdataALL_IQR_noMedia
###########################################################################################################################################################
# 51 STATES 
n_train <- 56 
n_train_minimum <- 40
list_results_step1.State <- list()
coef_all_full_model_lasso <- list()
coef_smooth_step1.State <- list()
coef_all_AR7_LR<-list()
coef_all_AR7_ARIMA<-list()
coef_all_full_model_ridge<-list()
coef_all_full_model_no_gt_ridge<-list()
coef_all_full_model_net<-list()

ts_max_lag <- 1:7  #lag a week
case_lag <- c(7,28) #a week and two weeks
vacc_lag <- c(7)
#gt_lag = case_lag
#mobility_lag <- c(14,21,28,35)
decay <- 0.8
pred_week = 1:14
nfolds<-10
state_names<-colnames(state_hosp)[c(-2,-19,-28)] # get rid of AS PR VI
alphas<-c(0,0.5,1)# searching through lasso, ridge and elastic net
#load("0912_Download_ALLData.rda")
# state-level google search terms and hosp
if(FALSE){
  library(RColorBrewer)
  pdf("state_gt_hosp_nat_new.pdf")
  for (region in state_names) {
    curr_hosp<-state_hosp[,region]
    neighboring_state<-find_neighboring_states(region)
    neighboring_state<-paste("US-",neighboring_state,sep = "")
    region<-paste("US-",region,sep = "")
    temp_terms = names(gtdata_stateMixed_IQR_noMedia[[region]])[names(gtdata_stateMixed_IQR_noMedia[[region]])%in% terms_important]
    temp_terms<-terms_important
    #temp_term[4]<-"loss.of.taste"
    #selected gt terms
    gt_nat <- gtdata_stateMixed_IQR_noMedia[[region]][,temp_terms]
    # for(i in 1:length(neighboring_state)){
    #   gt_nat<-gt_nat+gtdata_stateMixed_IQR_noMedia[[neighboring_state[i]]][,temp_terms]
    # }
    
    colnames(gt_nat)<-paste("state-",colnames(gt_nat),sep = "")
    nat<-gtdata_stateMixed_IQR_noMedia[[region]][,temp_terms]
    colnames(nat)<-paste("National-",colnames(nat),sep = "")
    nat<-stats::lag(rollmean(nat,7),3)
    gt_nat<-stats::lag(rollmean(gt_nat,7),3)
    
    visual_data<-na.omit(cbind(curr_hosp,nat))
    for(i in 2:ncol(visual_data)){
      visual_data[,i]<-visual_data[,i]*(mean(visual_data[,1])/mean(visual_data[,i]))
    }
    
    brewer<-brewer.pal(ncol(visual_data), "Dark2")
    brewer[1]<-'black'
    dates_select<-seq(as.Date("2020-09-01"),as.Date("2021-08-01"),by=1)
    plot(visual_data[dates_select],col=brewer,main = paste("State state_hosp and GT"," of ",region))
    print(addLegend("bottomleft",legend.names =colnames(visual_data),col=brewer,
                    lty=c(1,1),
                    lwd=c(2,2),
                    ncol=2,
                    bg="white"))
  }
  dev.off()
}
#region<-"TX"
for (region in state_names[49:51]){
  print(region)
  #target
  pred_target = state_hosp[,region]
  #cases
  if(region=="PR"){state_name="Puerto Rico"
  }else if(region=="DC"){state_name="Washington"
  }else{state_name=state.name[which(state.abb == region)]}
    
  state_cases<-state_cases_all[,region]
  
  # vaccination
  vaccin_state <- vaccin_all[,region]
  storage.mode(vaccin_state)<-'numeric'
  #neighboring hosp
  #neighboring states
  neighboring_state<-find_neighboring_states(region)
  hosp_neighboring<-xts(rep(0,length(index(pred_target))),order.by = index(pred_target))#if no neighboring state exist, it equals 0
  if (length(neighboring_state)!=0){
    for(i in neighboring_state){
      for (j in colnames(state_hosp)){
        if (i==j){
          neighboring_hosp_state<-state_hosp[,j]
          hosp_neighboring <- cbind(hosp_neighboring, neighboring_hosp_state)
        }
      }
      
    }
    hosp_neighboring <- hosp_neighboring[,-1]
  }
  
  
  region<-paste("US-",region,sep = "")
  
  temp_terms = names(gtdata_stateMixed_IQR_noMedia[[region]])[names(gtdata_stateMixed_IQR_noMedia[[region]])%in% terms_important]
  #selected gt terms
  gt_nat <- gtdata_stateMixed_IQR_noMedia[[region]][,temp_terms]
  neighboring_state<-paste("US-",neighboring_state,sep = "")
  for(s in 1:length(neighboring_state)){
    gt_nat<-gt_nat+gtdata_stateMixed_IQR_noMedia[[neighboring_state[s]]][,temp_terms]
  }
  GT<-gtdata_stateMixed_IQR_noMedia[["US"]][,temp_terms]
  colnames(GT)<-paste("US.",colnames(GT),sep="")
  gt_nat<-cbind(gt_nat,GT)
  
  gt_nat<-stats::lag(rollmean(gt_nat,7),3)
  gt_lag <- MedRaw_optLag[temp_terms]
  temp_terms<-colnames(gt_nat)
  gt_lag<-rep(gt_lag,2)
  names(gt_lag)[(length(gt_lag)-ncol(GT)+1):length(gt_lag)]<-colnames(GT)
  #if only use national google search terms
  if(TRUE){
  temp_terms = names(gtdata_stateMixed_IQR_noMedia[[region]])[names(gtdata_stateMixed_IQR_noMedia[[region]])%in% terms_important]
  gt_nat<-gtdata_stateMixed_IQR_noMedia[["US"]][,temp_terms]
  gt_lag <- MedRaw_optLag[temp_terms]
  }
  
  for(n_forward in pred_week){
    
    #x_gt <- cbind(
    #stats::lag(gt_nat, pmax(gt_lag, n_forward))  
    #,stats::lag(gt_nat, n_forward)  
    #)
    x_ar <- stats::lag(pred_target, ts_max_lag + n_forward - 1)
    x_cases <- stats::lag(state_cases, pmax(case_lag, n_forward))
    x_vacc<-stats::lag(vaccin_state,pmax(vacc_lag,n_forward))
    
    x_gt <- lapply(temp_terms,function(i){stats::lag(gt_nat[,i],pmax(gt_lag[i],n_forward) )})
    x_gt = do.call(cbind,x_gt)
    # x_gt<-NA
    # for(term in names(gt_lag)){
    #   print(term)
    #   x_gt<-cbind(x_gt,stats::lag(gt_nat[,term],max(gt_lag[term],n_forward) ))
    # }
    # x_gt<-x_gt[,-1]
    x_neighbor<-hosp_neighboring
    #x_mobility = stats::lag(gt_mobility, pmax(mobility_lag, n_forward))
    x_curr <- na.omit(merge(x_ar, 
                            x_cases, 
                            x_vacc,
                            x_gt , x_neighbor ,Days_of_Week)) 
    #exclude google search terms
    x_curr2 <- na.omit(merge(x_ar, x_cases, 
                             x_vacc , 
                             x_neighbor ,Days_of_Week)) 
    #x_curr <- na.omit(merge(x_ar, x_cases, x_gt , x_mobility,  Days_of_Week)) 
    common_id_curr <- index(na.omit(merge(x_curr, pred_target, all = F)))  #get the lagged data's date, the dates that we will train and test for (dont have NA)
    
    pred <-  pred_target[common_id_curr] #predict the dates that have lagged data (omit first two weeks of data)
    pred[] <- NA
    pred_ARIMA700<-pred
    pred_ARIMA710<-pred
    pred_AR7_LR<-pred
    pred2<-pred
    pred_ridge<-pred
    pred2_ridge<-pred
    pred_smooth <-pred_target[common_id_curr]  
    pred_smooth[] <- NA
    pred_full_elastic_net<-pred
    pred_full_elastic_net_no_gt<-pred
    pred_cva_full<-pred
    pred_cva_full_no_gt<-pred
    coef <- matrix(nrow=length(common_id_curr), ncol = ncol(x_curr) + 1)
    coef <- xts(coef, order.by = common_id_curr)
    colnames(coef) <- c("intercept",colnames(x_curr))
    
    coef_ar7_lr<-matrix(nrow=length(common_id_curr), ncol = ncol(x_ar) + 1)
    coef_ar7_lr<-xts(coef_ar7_lr,order.by = common_id_curr)
    colnames(coef_ar7_lr)<-c("intercept",colnames(x_ar))
    
    coef_ar7_arima<-coef_ar7_lr
    colnames(coef_ar7_arima)<-c(colnames(x_ar),"intercept")
    
    coef_full_model_ridge<-coef
    coef_full_model_net<-coef
    
    coef_full_model_ridge_no_gt<-matrix(nrow=length(common_id_curr), ncol = ncol(x_curr2) + 1)
    coef_full_model_ridge_no_gt<-xts(coef_full_model_ridge_no_gt,order.by = common_id_curr)
    colnames(coef_full_model_ridge_no_gt)<-c("intercept",colnames(x_curr2))
    
    coef_smooth <- matrix(nrow=length(common_id_curr), ncol = ncol(x_curr) + 1)
    coef_smooth <- xts(coef_smooth, order.by = common_id_curr)
    colnames(coef_smooth) <- c("intercept",colnames(x_curr))
    
    x_gt_new <- lapply(temp_terms,function(i){stats::lag(gt_nat[,i],pmax(gt_lag[i]-n_forward,0) )})
    x_gt_new = do.call(cbind,x_gt_new)
    x_new <- cbind(
      stats::lag(pred_target, ts_max_lag - 1),  
      stats::lag(state_cases, pmax(case_lag - n_forward, 0)), 
      stats::lag(vaccin_state,pmax(vacc_lag-n_forward,0)),
      x_gt_new,
      x_neighbor,
      stats::lag(Days_of_Week,(7-n_forward%%7))
    )  
    #exclude gt
    x_new2 <- cbind(
      stats::lag(pred_target, ts_max_lag - 1),  
      stats::lag(state_cases, pmax(case_lag - n_forward, 0)), 
      stats::lag(vaccin_state,pmax(vacc_lag-n_forward,0)),
      # x_gt_new,
      x_neighbor,
      stats::lag(Days_of_Week,(7-n_forward%%7))
    )  
    
    for(date_cur in common_id_curr){
      if(as.Date(date_cur)>=as.Date("2020-10-01")){
      
      train_id <- as.Date(date_cur) - ((n_train-1):0) #set training set, use up to 90 days of data to train parameters, when predicting each day use lagged death, cases, wordfreq and mobility
      train_id <- train_id[train_id >= common_id_curr[1]] #keep only dates past the first day of lagged data, 2020-02-04 (since before that no explanatory variables)
      if(length(train_id) >= n_train_minimum){ 
        if (length(which(as.matrix(pred_target[train_id])==0))>(n_train_minimum-5)){ #if too many zeros then perdict death is 0
          coef[as.Date(date_cur),] = 0 
          pred[as.Date(date_cur)] =pred_target[as.Date(date_cur)]
        }else{ #start training only when having at least n_train_minimum days of training data to tain (not considering lagged dates)
          #lasso full model
          
          x_lasso = x_curr[train_id]
          y_lasso = pred_target[index(x_lasso)]
          train_weights <- (decay)^(length(y_lasso):1)
          set.seed(100)
          lasso.fit <- glmnet::cv.glmnet(x = as.matrix(x_lasso), 
                                       y = as.matrix(y_lasso), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                       alpha = 1, weights = train_weights)
          lam.s <- lasso.fit$lambda.1se #select the lambda that gives the simpliest model within one standard error of optimal val of lambda
          coef[as.Date(date_cur),] <- as.numeric(coef(lasso.fit, lambda = lam.s)) #store the coef for the simpliest lambda in the training date's row (if common_id_curr[40] then stored in row 2020-02-04+39=2020-03-14) for all explanatory variables 
          pred[as.Date(date_cur)] <- predict(lasso.fit, newx = as.matrix(x_new[as.Date(date_cur)]), s = lam.s) #store prediction for that date ???predict tomorrow???
          pred[as.Date(date_cur)] <- pred[as.Date(date_cur)]*(pred[as.Date(date_cur)]>0)+(pred[as.Date(date_cur)]<=0)*pred_target[as.Date(date_cur)]
          #ridge full model
          ridge.fit <- glmnet::cv.glmnet(x = as.matrix(x_lasso), 
                                         y = as.matrix(y_lasso), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                         alpha = 0, weights = train_weights)
          lam.s_ridge <- ridge.fit$lambda.1se #select the lambda that gives the simpliest model within one standard error of optimal val of lambda
          pred_ridge[as.Date(date_cur)] <- predict(ridge.fit, newx = as.matrix(x_new[as.Date(date_cur)]), s = lam.s_ridge) #store prediction for that date ???predict tomorrow???
          pred_ridge[as.Date(date_cur)] <- pred_ridge[as.Date(date_cur)]*(pred_ridge[as.Date(date_cur)]>0)+(pred_ridge[as.Date(date_cur)]<=0)*pred_target[as.Date(date_cur)]
          coef_full_model_ridge[as.Date(date_cur),] <- as.numeric(coef(ridge.fit, lambda = lam.s_ridge))
          #elastic full model
          net.fit <- glmnet::cv.glmnet(x = as.matrix(x_lasso), 
                                       y = as.matrix(y_lasso), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                       alpha = 0.5, weights = train_weights)
          lam.s_ridge <- net.fit$lambda.1se #select the lambda that gives the simpliest model within one standard error of optimal val of lambda
          pred_full_elastic_net[as.Date(date_cur)] <- predict(net.fit, newx = as.matrix(x_new[as.Date(date_cur)]), s = lam.s_ridge) #store prediction for that date ???predict tomorrow???
          pred_full_elastic_net[as.Date(date_cur)] <- pred_full_elastic_net[as.Date(date_cur)]*(pred_full_elastic_net[as.Date(date_cur)]>0)+(pred_full_elastic_net[as.Date(date_cur)]<=0)*pred_target[as.Date(date_cur)]
          
          coef_full_model_net[as.Date(date_cur),] <- as.numeric(coef(net.fit, lambda = lam.s_ridge))
          
          #cv alphas
          
          cva.fit<-cva.glmnet(
            as.matrix(x_lasso),
            as.matrix(y_lasso),
            alpha = alphas,
            nfolds = 10,
            foldid = sample(rep(seq_len(nfolds), length = nrow(x_lasso))),
            outerParallel = NULL,
            checkInnerParallel = TRUE,
            weights=train_weights,
            grouped = FALSE
          )
          
          pred_cva_full[as.Date(date_cur)]<- predict(
            cva.fit,
            newx=as.matrix(x_new[as.Date(date_cur)]),
            get_alpha(cva.fit),
            s="lambda.1se"
          )
         # print(get_alpha(cva.fit))
          
          
          # no gt lasso
          x_lasso2<-x_curr2[train_id]
          y_lasso2<-pred_target[index(x_lasso2)]
          train_weights <- (decay)^(length(y_lasso2):1)
          
          lasso.fit2 <- glmnet::cv.glmnet(x = as.matrix(x_lasso2), 
                                        y = as.matrix(y_lasso2), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                        alpha = 1, weights = train_weights)
          lam.s2 <- lasso.fit2$lambda.1se
          pred2[as.Date(date_cur)] <- predict(lasso.fit2, newx = as.matrix(x_new2[as.Date(date_cur)]), s = lam.s2)
          
          ridge.fit2 <- glmnet::cv.glmnet(x = as.matrix(x_lasso2), 
                                          y = as.matrix(y_lasso2), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                          alpha = 0, weights = train_weights)
          lam.s2 <- ridge.fit2$lambda.1se
          pred2_ridge[as.Date(date_cur)] <- predict(ridge.fit2, newx = as.matrix(x_new2[as.Date(date_cur)]), s = lam.s2)
          coef_full_model_ridge_no_gt[as.Date(date_cur),] <- as.numeric(coef(ridge.fit2, lambda = lam.s2))
          
          #no gt elastic net
          net.fit2 <- glmnet::cv.glmnet(x = as.matrix(x_lasso2), 
                                        y = as.matrix(y_lasso2), nfolds = 10, grouped = FALSE,  #10 fold, alpha=1->lasso
                                        alpha = 0.5, weights = train_weights)
          lam.s2 <- net.fit2$lambda.1se
          pred_full_elastic_net_no_gt[as.Date(date_cur)] <- predict(net.fit2, newx = as.matrix(x_new2[as.Date(date_cur)]), s = lam.s2)
          
          #cv alphas
          cva.fit<-cva.glmnet(
            as.matrix(x_lasso2),
            as.matrix(y_lasso2),
            alpha = alphas,
            nfolds = 10,
            foldid = sample(rep(seq_len(nfolds), length = nrow(x_lasso2))),
            outerParallel = NULL,
            checkInnerParallel = TRUE,
            weights=train_weights,
            grouped = FALSE
            
          )
          
          pred_cva_full_no_gt[as.Date(date_cur)]<- predict(
            cva.fit,
            newx=as.matrix(x_new2[as.Date(date_cur)]),
            get_alpha(cva.fit),
            s="lambda.1se"
          )
          
          # ARIMA AR700
          res<-try(AR700 <- arima(pred_target[train_id] , order = c(7,0,0)),silent = TRUE)
          if(typeof(res[1])=="list"){
            AR700 <- arima(pred_target[train_id] , order = c(7,0,0) )
            pred_ARIMA700[as.Date(date_cur)]<-predict(AR700 , n.ahead = n_forward)$pred[n_forward]
            coef_ar7_arima[as.Date(date_cur),]<-coef(AR700)
          }else{pred_ARIMA700[as.Date(date_cur)] =pred_target[as.Date(date_cur)]}
          # ARIMA AR710
          res<-try(AR7 <- arima(pred_target[train_id] , order = c(7,1,0)),silent = TRUE)
          if(typeof(res[1])=="list"){
            AR7 <- arima(pred_target[train_id] , order = c(7,1,0) )
            pred_ARIMA710[as.Date(date_cur)]<-predict(AR7 , n.ahead = n_forward)$pred[n_forward]
          }else{pred_ARIMA710[as.Date(date_cur)] =pred_target[as.Date(date_cur)]}
          #linear regression AR7
          ar7_lr<-lm(as.matrix(pred_target[train_id])~x_ar[train_id])
          pred_AR7_LR[as.Date(date_cur)]<-as.numeric(coef(ar7_lr)%*%t(cbind(1,x_ar[as.Date(date_cur)])))
          coef_ar7_lr[as.Date(date_cur),]<-coef(ar7_lr)
          
        }
        # if (!is.na(as.numeric(coef[as.Date(date_cur),])) && !is.na(as.numeric(coef[(as.Date(date_cur)-1),])) && 
        #     !is.na(as.numeric(coef[(as.Date(date_cur)-2),])) && !is.na(as.numeric(coef[(as.Date(date_cur)-3),])) && 
        #     !is.na(as.numeric(coef[(as.Date(date_cur)-4),])) ){
        #   temp_coef =  (as.numeric(coef[as.Date(date_cur),])+  as.numeric(coef[(as.Date(date_cur)-1),])+  as.numeric(coef[(as.Date(date_cur)-2),]) + as.numeric(coef[(as.Date(date_cur)-3),]) + as.numeric(coef[(as.Date(date_cur)-4),]))/5
        #   coef_smooth[as.Date(date_cur),] = temp_coef
        #   pred_smooth[as.Date(date_cur)] = sum(temp_coef * c(1,as.numeric(x_new[as.Date(date_cur)])))
        # }
      }
    }}
    ##
    truth <- stats::lag(pred_target, -n_forward) #lag backwards (yesterday has today's data), truth is today predicting tomorrow and upto two weeks head (save to today's row now)
    naive <- pred_target #naive approach simply using the death count today (newest) to predict future (whether it is tomorrow or next week)
    naive_increment <- 2*pred_target-stats::lag(pred_target,n_forward) #second naive approach simply using the death count 7 days ago to predict today. 
    naive_period <- stats::lag(pred_target , (7-n_forward%%7) )
    set.seed(100)
    argo_pre <- pred
    argo_presmooth <- round((pred_smooth))
    #results <- merge(truth, naive, naive_increment, argo_pre, pred_ridge,pred_full_elastic_net,pred_cva_full,pred2,pred2_ridge,pred_full_elastic_net_no_gt,pred_cva_full_no_gt,pred_AR7_LR,pred_ARIMA700,pred_ARIMA710, all = F)
    results <- merge(truth, naive, naive_period,naive_increment,pred,pred_ridge,pred_full_elastic_net,pred_cva_full,pred_ARIMA700,pred_ARIMA710)
    #colnames(results) <- c("truth", "naive", "naive_increment", "full_model_lasso","full_model_ridge", "full_model_elastic_net","full_model_CV_alpha",
                         #  "full_model_no_gt_lasso","full_model_no_gt_ridge","full_model_no_gt_elastic_net","full_model_no_gt_CV_alpha","AR7_LinearRegression","AR7_ARIMA700","AR7_ARIMA710")
    colnames(results) <- c("truth", "naive","naive_period", "naive_increment", "full_model_lasso","full_model_ridge","full_model_elastic_net","full_model_CV_alpha","AR7_ARIMA700","AR7_ARIMA710")
    
    list_results_step1.State[[region]][[n_forward]] <- results  
    coef_all_full_model_lasso[[region]][[n_forward]] <- coef
    #coef_smooth_step1.State[[region]][[n_forward]] <- coef_smooth
    coef_all_full_model_net[[region]][[n_forward]] <- coef_full_model_net
    
    #coef_all_AR7_LR[[region]][[n_forward]]<-coef_ar7_lr
    #coef_all_AR7_ARIMA[[region]][[n_forward]]<-coef_ar7_arima
    coef_all_full_model_ridge[[region]][[n_forward]]<-coef_full_model_ridge
    #coef_all_full_model_no_gt_ridge[[region]][[n_forward]]<-coef_full_model_ridge_no_gt
    
  }
  save(coef_all_full_model_net,coef_all_AR7_LR,coef_all_AR7_ARIMA,
       coef_all_full_model_lasso,coef_all_full_model_ridge,
       coef_all_full_model_no_gt_ridge,list_results_step1.State,file = "state_results_hosp_admission_nat_gt_0117.rda")
}



#save()
#load("prediction_results.rda")
###########################################################################################################################################################
date_selected<-seq(as.Date("2020-10-01"),as.Date("2022-01-01"),by=1)
list_results_State_0901 = list()
pred_week_ = 1:7
for (i in names(list_results_step1.State)){
  list_results_State_0901[["1 week ahead"]][[i]] = lapply(pred_week_, function(x) list_results_step1.State[[i]][[x]][date_selected])
  list_results_State_0901[["1 week ahead"]][[i]]  = Reduce("+", list_results_State_0901[["1 week ahead"]][[i]]) 
  #list_results_State_0328_2021Pred[["1 week ahead"]][[i]] = list_results_State_0328_2021Pred[["1 week ahead"]][[i]]["2021-01/",]
}
pred_week_ = 8:14
for (i in names(list_results_step1.State)){
  list_results_State_0901[["2 week ahead"]][[i]] = lapply(pred_week_, function(x) list_results_step1.State[[i]][[x]][date_selected])
  list_results_State_0901[["2 week ahead"]][[i]]  = Reduce("+", list_results_State_0901[["2 week ahead"]][[i]]) 
  #list_results_State_0328_2021Pred[["1 week ahead"]][[i]] = list_results_State_0328_2021Pred[["1 week ahead"]][[i]]["2021-01/",]
}

save(coef_all_full_model_net,coef_all_AR7_LR,coef_all_AR7_ARIMA,
     coef_all_full_model_lasso,coef_all_full_model_ridge,
     coef_all_full_model_no_gt_ridge,list_results_step1.State,list_results_State_0901,file = "state_results_hosp_admission_nat_gt_0117.rda")

for (state in names(list_results_State_0901[["1 week ahead"]])){
  gc()
  write.xlsx(list_results_State_0901[["1 week ahead"]][[state]], file = "prediction_state_hosp_admission_one_week_nat_gt_0117.xlsx", sheetName=paste(state), append=TRUE)
}
for (state in names(list_results_State_0901[["2 week ahead"]])){
  gc()
  write.xlsx(list_results_State_0901[["2 week ahead"]][[state]], file = "prediction_state_hosp_admission_two_week_nat_gt_0117.xlsx", sheetName=paste(state), append=TRUE)
}


#############################################################################
#Evaluation
#rm(list=ls())

library(Metrics)
library(readxl)
read_excel_allsheets <- function(filename, tibble = FALSE) {
  # I prefer straight data.frames
  # but if you like tidyverse tibbles (the default with read_excel)
  # then just pass tibble = TRUE
  sheets <- readxl::excel_sheets(filename)
  x <- lapply(sheets, function(X) readxl::read_excel(filename, sheet = X))
  if(!tibble) x <- lapply(x, as.data.frame)
  names(x) <- sheets
  x
}
whole_results<-read_excel_allsheets( "prediction_state_hosp_admission_two_week_nat_gt_0117.xlsx")

evalution_metrics_rmse<-matrix(NA,nrow=length(whole_results),ncol=ncol(whole_results[[1]])-2)
colnames(evalution_metrics_rmse)<-colnames(whole_results[[1]])[3:length(colnames(whole_results[[1]]))]
states<-names(whole_results)
#states<-states[-32]
rownames(evalution_metrics_rmse)<-states

evalution_metrics_cor<-evalution_metrics_rmse
evalution_metrics_MAE<-evalution_metrics_rmse
evalution_metrics_MAPE<-evalution_metrics_rmse
prediction_start<-as.Date("2021-01-01")
prediction_end<-as.Date("2022-01-10")
dates<-seq(prediction_start,prediction_end,by=1)
for (state in states){
  curr_results<-na.omit(whole_results[[state]])
  
  curr_results<-xts(curr_results[,-1],order.by = as.Date(curr_results[,1]))
  curr_results<-curr_results[dates]
  
  
  for (curr_method in colnames(curr_results)[2:length(colnames(curr_results))]) {
    evalution_metrics_rmse[state,curr_method]<-rmse(curr_results[,1],curr_results[,curr_method])
    evalution_metrics_cor[state,curr_method]<-cor(curr_results[,1],curr_results[,curr_method])
    evalution_metrics_MAE[state,curr_method]<-mae(curr_results[,1],curr_results[,curr_method])
    evalution_metrics_MAPE[state,curr_method]<-mape(curr_results[,1],curr_results[,curr_method])
  }
  
}

final_evaluation<-matrix(NA,nrow = ncol(evalution_metrics_rmse),ncol = 4)
colnames(final_evaluation)<-c("RMSE","Cor","MAE","MAPE")
rownames(final_evaluation)<-colnames(evalution_metrics_rmse)
final_evaluation[,1]<-round(colMeans(evalution_metrics_rmse),digits = 2)
final_evaluation[,2]<-round(colMeans(evalution_metrics_cor),digits = 3)
final_evaluation[,3]<-round(colMeans(evalution_metrics_MAE),digits = 2)
final_evaluation[,4]<-round(colMeans(evalution_metrics_MAPE),digits = 4)
rownames(final_evaluation)<-colnames(curr_results)[-1]
#write.xlsx(evalution_metrics_rmse,file="Evaluation_metrics_RMSE.xlsx")
write.xlsx(final_evaluation,file="state-two_week.xlsx")

evalution_metrics_rmse<-data.frame(evalution_metrics_rmse)
evalution_metrics_rmse$min_method<-colnames(evalution_metrics_rmse)[apply(evalution_metrics_rmse, 1, FUN = which.min)]
#write.xlsx(evalution_metrics_rmse,file="state-level_neighboring_net rmse.xlsx")

if(FALSE){
  library(RColorBrewer)
  pdf("visualization_all_states_net_mixted_admission_two_week_ahead.pdf")
  for (state in states){
    curr_results<-na.omit(whole_results[[state]])
    curr_results<-xts(curr_results[,-1],order.by = as.Date(curr_results[,1]))
    curr_results<-curr_results[,c(1,4,5,9)]
    curr_results<-curr_results[index(curr_results)>=prediction_start]
    curr_results<-curr_results[index(curr_results)<=prediction_end]
    brewer<-brewer.pal(ncol(curr_results), "Dark2")
    brewer[1]<-'black'
    visual_data<-curr_results
    dates_select<-seq(as.Date("2020-09-01"),as.Date("2021-08-01"),by=1)
    plot(visual_data[dates_select],col=brewer,main = paste("State state_hosp Prediction"," of ",state))
    print(addLegend("bottomleft",legend.names =colnames(visual_data),col=brewer,
                    lty=c(1,1),
                    lwd=c(2,2),
                    ncol=2,
                    bg="white"))
  }
  dev.off()
}



